
library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)
library(reshape2)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

source(paste0(working_dir,"Auxiliary Scripts/RegressionRunner.R"))

##load in the main datasets
railroads <- readRDS(paste0(working_dir,"Data/rails_waterways.RDS"))

##now we look at correlations over time
all_cors <- NULL
for (d in seq(1850,1890,by=10)){
  tmp <- railroads %>% filter(decade == d, !is.na(fips))
  res <- data.frame("decade" = d, "num_lags" = c("Lag = 1 decade", "Lag = 2 decades"), 
                    "corr_rail_post" = c( cor(log(tmp$rail_length + 1), log(tmp$post_lag1 + 1), use = "complete"),
                                          cor(log(tmp$rail_length + 1), log(tmp$post_lag2 + 1), use = "complete")),
                    "corr_post_rail" = c( cor(log(tmp$numPost + 1), log(tmp$rail_lag1 + 1), use = "complete"),
                                          cor(log(tmp$numPost + 1), log(tmp$rail_lag2 + 1), use = "complete")),
                    "lower_corr_rail_post" = c( cor.test(log(tmp$rail_length + 1), log(tmp$post_lag1 + 1))$conf.int[1],
                                                cor.test(log(tmp$rail_length + 1), log(tmp$post_lag2 + 1))$conf.int[1]),
                    "upper_corr_railPost" = c( cor.test(log(tmp$rail_length + 1), log(tmp$post_lag1 + 1))$conf.int[2],
                                                cor.test(log(tmp$rail_length + 1), log(tmp$post_lag2 + 1))$conf.int[2]),
                    "lower_corr_post_rail" = c( cor.test(log(tmp$numPost + 1), log(tmp$rail_lag1 + 1))$conf.int[1],
                                                cor.test(log(tmp$numPost + 1), log(tmp$rail_lag2 + 1))$conf.int[1]),
                    "upper_corr_post_rail" = c( cor.test(log(tmp$numPost + 1), log(tmp$rail_lag1 + 1))$conf.int[2],
                                                cor.test(log(tmp$numPost + 1), log(tmp$rail_lag2 + 1))$conf.int[2]),
                    stringsAsFactors = FALSE)
  all_cors <- rbind(all_cors, res)
}


all_cors_reshaped <- melt(all_cors[,1:4], id.vars = c("decade", "num_lags"))
colnames(all_cors_reshaped)[3:4] <- c("Direction of Correlation", "Correlation")
all_cors_reshaped$`Direction of Correlation` <- as.character(all_cors_reshaped$`Direction of Correlation`)

all_cors_reshaped$`Direction of Correlation`[all_cors_reshaped$`Direction of Correlation` == "corr_post_rail"] <-
  "Railroad Mileage (lagged) \nAnticipating Post Offices\n"

all_cors_reshaped$`Direction of Correlation`[all_cors_reshaped$`Direction of Correlation` == "corr_rail_post"] <-
  "Post Offices (lagged) \nAnticipating Railroad Mileage\n"

##add in the confidence intervals
all_cors_reshaped_lower <- melt(all_cors[,c(1,2,5,7)], id.vars = c("decade", "num_lags"))
all_cors_reshaped_upper <- melt(all_cors[,c(1,2,6,8)], id.vars = c("decade", "num_lags"))

all_cors_reshaped$Lower <- all_cors_reshaped_lower$value
all_cors_reshaped$Upper <- all_cors_reshaped_upper$value

rr_plot <-
  ggplot(all_cors_reshaped, aes(decade, Correlation, colour = `Direction of Correlation`)) +
  geom_point(size = 3, position=position_dodge(width=1.5)) +
  geom_errorbar(aes(x = decade, ymin = Lower, ymax = Upper), width = 0.1, position=position_dodge(width=1.5)) +
  xlab("Decade") +
  ylab(expression("Pearson's " ~ rho)) +
  facet_wrap( ~ num_lags) + 
  theme_bw()


ggsave(rr_plot, 
       file = paste0(working_dir,"Replication Plots/railroads_vs_POs.pdf"), 
       width = 12, height = 7)


